Exploring dual-cue approach
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggplot2)
library(here)
here() starts at /Users/avrilwang/Desktop/Project-Plasmodium
library(deSolve)
library(crone)
library(optimParallel)
Loading required package: parallel
library(doParallel)
Loading required package: foreach
Loading required package: iterators
library(doRNG)
Loading required package: rngtools
library(arrow)
Attaching package: ‘arrow’
The following object is masked from ‘package:utils’:
timestamp
library(stringr)
library(parallel)
library(ggpubr)
library(microbenchmark)
library(profvis)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(RANN)
parameters_tsukushi <- c(R1 = 8.89*(10^6), # slightly higher
lambda = 3.7*(10^5),
mu = 0.025,
p = 8*(10^-6), # doubled form original
alpha = 1,
alphag = 2,
beta = 5.721,
mum = 48,
mug = 4,
I0 = 43.85965,
Ig0 = 0,
a = 150,
b = 100,
sp = 1,
psin = 16.69234,
psiw = 0.8431785,
phin = 0.03520591,
phiw = 550.842,
iota = 2.18*(10^6),
rho = 0.2627156)
I_range_none <- seq(0, 6*(10^6), by = (6*(10^6))/500)
I_range_log <- seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500)
G_range_none <- seq(0, 6*(10^4), by = (6*(10^4))/500)
G_range_log <- seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/500)
testing out mgcv dual cue spline surface. looks good.
note that if we go with ti, than we have only interactions between the 2 cues whereas te includes both main effect and interaction. ti gives us 5 parameters to optimze whereas te gives us 9. We should compare the two
source(here("functions/chabaudi_si_clean.R"))
# smaller division
cue_range <- seq(0, 6*(10^6), by = (6*(10^6))/500)
cue_range_b <- seq(0, 6*(10^4), by = (6*(10^4))/500)
parameters_cr <- rep(-0.5,9)
cr_grid <- expand.grid(cue_range, cue_range_b)
## rename
names(cr_grid) <- c("cue_range", "cue_range_b")
## create dummy y
dummy_y <- runif(length(cue_range_b), 0, 1)
## put together df
dummy_df <- data.frame(cue_range, cue_range_b, dummy_y)
## gam model
dummy_cr.mod <- mgcv::gam(dummy_y ~ te(cue_range, cue_range_b,
bs = c("tp", "tp")
k = c(3,3)),
data = dummy_df,
method = "REML")
## assign parameters
dummy_cr.mod$coefficients <- parameters_cr
# exponential transformation to limit conversion rate to between 0 and 1
cr_fun <- function(cue_1, cue_2){
res <- exp(-exp(mgcv::predict.gam(dummy_cr.mod,
newdata = data.frame("cue_range" = cue_1,
"cue_range_b" = cue_2))))
return(res)
}
cutting down computational time
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/test.R"))
# each run takes around 1 minute (cr_fun) (100 divisions)
# switching to nn2 takes 145 seconds
# increasing divisons to 500 using predict.gam takes still around 57 seconds. So increasing to 500 doesn't seem like a bad idea
system.time(test(parameters_cr = rep(0.5, 5),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.001),
cue = "I",
cue_b = "G",
cue_range = I_range_log,
cue_range_b = G_range_log,
solver = "vode",
log_cue = "log10",
log_cue_b = "log10"
))
making shit go fast! Need to cut down on time to get cr from function
cue_range <- I_range_log
cue_range_b <- G_range_log
cr_grid <- expand.grid(cue_range, cue_range_b)
## rename
names(cr_grid) <- c("cue_range", "cue_range_b")
## create dummy y
dummy_y <- runif(length(cue_range_b), 0, 1)
## put together df
dummy_df <- data.frame(cue_range, cue_range_b, dummy_y)
## gam model
dummy_cr.mod <- mgcv::bam(dummy_y ~ ti(cue_range, cue_range_b,
k = c(3,3)),
data = dummy_df)
## assign parameters
dummy_cr.mod$coefficients <- rep(0.5,5)
# original function of getting cr
cr_fun <- function(cue_1, cue_2){
exp(-exp(mgcv::predict.bam(dummy_cr.mod,
newdata = data.frame("cue_range" = cue_1,
"cue_range_b" = cue_2),
n.threads = 3)))
}
cr_res <- exp(-exp(mgcv::predict.gam(dummy_cr.mod,
newdata = cr_grid)))
cr_df <- cbind(cr_grid, cr_res)
cue_1 <- 1
cue_2 <- 0.5
# another function for nearnest neighoubrs
nn_fun <- function(cue_1, cue_2){
cr_df[nn2(data=cr_grid, query = cbind(cue_1, cue_2), k=1, treetype= "kd",
searchtype = "priority")[[1]],3]
}
nn_fun2 <- function(cue_1, cue_2){
cr_df[FNN::knnx.index(data=cr_grid, query = cbind(cue_1, cue_2), k = 1, algorithm = "kd_tree"),3]
}
# very similar
cr_fun(1,1)
nn_fun(1,1)
nn_fun2(1,1)
# gam.predict is faster
microbenchmark(cr_fun(1,1))
microbenchmark(nn_fun(1,1))
microbenchmark(nn_fun2(1,1))
sanity checkz
source(here("functions/test.R"))
# switching between cue and cue_b. should not make a difference
# 4.469067
test(parameters_cr = rep(0.5, 5),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.01),
cue = "I",
cue_b = "G",
cue_range = I_range_log,
cue_range_b = G_range_log,
solver = "vode",
log_cue = "log10",
log_cue_b = "log10"
)
# 4.469066. good
test(parameters_cr = rep(0.5, 5),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.01),
cue = "G",
cue_b = "I",
cue_range = G_range_log,
cue_range_b = I_range_log,
solver = "vode",
log_cue = "log10",
log_cue_b = "log10"
)
# short time steps vs longer time steps. barely made a difference
# 4.46904
test(parameters_cr = rep(0.5, 5),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.001),
cue = "I",
cue_b = "G",
cue_range = I_range_log,
cue_range_b = G_range_log,
solver = "vode",
log_cue = "log10",
log_cue_b = "log10"
)
# changing logs on cue_b
# 6.20639
test(parameters_cr = rep(0.5, 5),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.01),
cue = "I",
cue_b = "G",
cue_range = I_range_log,
cue_range_b = G_range_none,
solver = "vode",
log_cue = "log10",
log_cue_b = "none"
)
# changing logs on cue
# 5.110873
test(parameters_cr = rep(0.5, 5),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.01),
cue = "I",
cue_b = "G",
cue_range = I_range_none,
cue_range_b = G_range_log,
solver = "vode",
log_cue = "none",
log_cue_b = "log10"
)
# checing single cue infection. same
## 4.724804
test(parameters_cr = rep(0.5, 4),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.01),
cue = "I",
cue_range = I_range_none,
solver = "vode",
log_cue = "none",
)
# 4.724804
chabaudi_si_clean(parameters_cr = rep(0.5, 4),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, 0.01),
cue = "I",
cue_range = I_range_none,
solver = "vode",
log_cue = "none",
)
#————————————————–# # remaking of co-infection reaction norm #————————————————–#
source(here("functions/chabaudi_ci_clean.R"))
source(here("functions/par_to_df.R"))
function for getting conversion rate reaction norm and dynamics
get_ci_dyn <- function(df){
# get heavisde transformation
heaviside_trans <- function(cue_range, max){
crone::heaviside(cue_range)*(cue_range)+(crone::heaviside(cue_range-max)*(max-cue_range))
}
# read in df info
cue_1 <- ifelse(stringr::str_detect(df$cue, "-i"),
gsub("*-i", "1", df$cue),
df$cue)
cue_2 <- ifelse(stringr::str_detect(df$cue, "-i"),
gsub("*-i", "2", df$cue),
df$cue)
log <- ifelse(stringr::str_detect(df$log, "log"),
"log10", "none")
# get parameter
par <- c(df$var1, df$var2, df$var3, df$var4)
cue_range <- seq(df$low, df$high, by = df$by)
label <- df$label
# get dynamics
dyn <- chabaudi_ci_clean(parameters_cr_1 = par,
parameters_cr_2 = par,
immunity = "tsukushi",
parameters = parameters_tsukushi,
cue_1 = cue_1,
cue_2 = cue_2,
cue_range_1 = cue_range,
cue_range_2 = cue_range,
log_cue_1 = log,
log_cue_2 = log,
solver = "vode",
time_range = seq(0, 30, 0.001),
dyn = T)
dyn2 <- data.frame(dyn, label = rep(label, nrow(dyn)))
# get reaction norm
rn <- par_to_df(par = par, cue_range = cue_range)
# get rug
## if + in cue, need to filter out those variables and add together
if(stringr::str_detect(cue_1, "\\+")){
cue_split <- stringr::str_split(string = cue_1, pattern = "\\+", simplify = T)
## get the two cues
cue_temp_1 <- cue_split[[1]]
cue_temp_2 <- cue_split[[2]]
## filter dyn
rug <- dyn2 %>%
filter(variable == cue_temp_1 | variable == cue_temp_2) %>%
dplyr::group_by(time) %>%
dplyr::mutate(sum = sum(value)) %>%
select(time, value = sum, label)
}
## if sum
if(cue_1 == "sum"){
## filter dyn
rug <- dyn2 %>%
filter(variable == "I1" | variable == "I2" | variable == "Ig1" | variable == "Ig2") %>%
dplyr::group_by(time) %>%
dplyr::mutate(sum = sum(value)) %>%
select(time, value = sum, label)
}
# for simplified cue
if(stringr::str_detect(cue_1, "\\+", negate = T) && cue_1 != "sum"){
rug <- dyn2 %>%
dplyr::filter(variable == cue_1) %>%
dplyr::select(time, value, label)}
# for logged, backtransform cue range of reaction norm
rn2 <- rn
if(stringr::str_detect(log, "log")){rn2$cue_range <- 10^(rn2$cue_range)}
# append label to reaction norm, dyn, and rug
rn2 <- data.frame(rn2, label = rep(label, nrow(rn)))
return(list(dyn2, rn2, rug))
}
read in co_infection opt
ci_opt.df <- read.csv(here("data/ci_opt.csv"))
cue_range_ci <- read.csv(here("data/cue_range_ci.csv"))
# join with cue range
ci_opt.df <- ci_opt.df %>% left_join(select(cue_range_ci, id, low, high, by), by = c("id" = "id"))
# split
ci_opt.ls <- split(ci_opt.df, seq(nrow(ci_opt.df)))
# loop
ci_opt.dyn <- lapply(ci_opt.ls, get_ci_dyn)
# bind together dynamics
dyn.ls <- lapply(ci_opt.dyn, function(x)x[[1]])
ci_dyn.df <- do.call(rbind, dyn.ls)
write_parquet(ci_dyn.df, here("data/ci_dyn/ci_dyn.parquet"))
rn.ls <- lapply(ci_opt.dyn, function(x)x[[2]])
ci_rn.df <- do.call(rbind, rn.ls)
write_parquet(ci_rn.df, here("data/ci_dyn/ci_rn.parquet"))
rug.ls <- lapply(ci_opt.dyn, function(x)x[[3]])
ci_rug.df <- do.call(rbind, rug.ls)
write_parquet(ci_rug.df , here("data/ci_dyn/ci_rug.parquet"))
get in single infection reaction norm to match
si_opt.df <- read.csv(here("data/si_opt.csv"))
cue_range_si <- read.csv(here("data/cue_range_si.csv"))
si_opt.df <- si_opt.df %>% left_join(select(cue_range_si, id, low, high, by, label), by = "id")
# function to get single infection reaction norm
get_si_rn <- function(df){
par <- c(df$var1, df$var2, df$var3, df$var4)
cue_range <- seq(df$low, df$high, by = df$by)
rn <- par_to_df(par = par, cue_range = cue_range)
# for logged, backtransform cue range of reaction norm
rn2 <- rn
if(stringr::str_detect(df$log, "log")){rn2$cue_range <- 10^(rn2$cue_range)}
# append label to reaction norm, dyn, and rug
rn2 <- data.frame(rn2, id = df$id, label = rep(df$label, nrow(rn)))
return(rn2)
}
# rn function and bind together
si_opt.ls <- split(si_opt.df, seq(nrow(si_opt.df)))
si_opt.rn <- lapply(si_opt.ls, get_si_rn)
si_opt.rn2 <- do.call(rbind, si_opt.rn)
get single infeciton rug
si_dyn <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
# join to get label
si_dyn.df <- left_join(si_dyn, si_opt.df, by = "id")
# process get rug
get_rug <- function(df){
cue <- unique(df$cue)
if(stringr::str_detect(cue, "\\+")){
cue_split <- stringr::str_split(string = cue, pattern = "\\+", simplify = T)
## get the two cues
cue_temp_1 <- cue_split[[1]]
cue_temp_2 <- cue_split[[2]]
## filter dyn
rug <- df %>%
filter(variable == cue_temp_1 | variable == cue_temp_2) %>%
dplyr::group_by(time) %>%
dplyr::mutate(sum = sum(value)) %>%
select(time, value = sum, label)
}
# for simplified cue
if(stringr::str_detect(cue, "\\+", negate = T)){
rug <- df %>%
dplyr::filter(variable == cue) %>%
dplyr::select(time, value, label)}
return(rug)
}
# split based on individual labels
si_dyn.ls <- si_dyn.df %>% group_split(id)
# run function to get rug
si_rug <- mclapply(si_dyn.ls, get_rug)
si_rug.df <- do.call(rbind, si_rug)
process reaction norm data to limit reaction norm to around the infection space

match single infection rn with coinfection
plot reaction norm
c
function (...) .Primitive("c")
#—————————————–# # plot maximum fitness accumulation for both si and ci #—————————————–#
ez_label <- read.csv(here("data/ez_label.csv"))
process single infection data
# import in si maximum transmission potential
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
# get maximum tau_cum for 30 days
si_max_tau <- si_dyn.df %>%
filter(variable == "tau_cum") %>%
group_by(id) %>%
summarise(fitness_si = max(value))
process co-infection data
# import in co-infeciton data, 30 days simulation
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
# get maximum tau_cum. Note multiplied by 2 to get total fitness for both strains, given that both are sadiptng the same strategy, we consider them the same genotye
ci_max_tau <- ci_dyn.df %>%
filter(variable == "tau_cum1") %>%
group_by(label) %>%
summarise(fitness_ci = max(value, na.rm = T))
get label and join together 2 dataframes
si_ci_max_tau <- ci_max_tau %>%
left_join(ez_label, by = c("label" = "label_ci")) %>%
left_join(si_max_tau, by = c("id_si" = "id"))
plot
ggsave(here("figures/report21/si_ci_fitness.png"), height = 5)
Saving 7 x 5 in image
#—————————————# # time series conversion rate (line form) #—————————————#
single infection
cr_si <- si_dyn.df %>%
filter(variable == "cr")
# good cue bad cue
si_cue_dv <- si_max_tau %>%
mutate(classification = case_when(
fitness_si > 10.5 ~ "High-performing",
fitness_si <= 10.5 ~ "Poor-performing"
))
# join with classificaiton
cr_si.df <- cr_si %>% left_join(si_cue_dv) %>% left_join(ez_label, by = c("id" = "id_si"))
Joining, by = "id"
# split into top erforming and poor-performing cues
cr_si.high <- cr_si.df %>% filter(classification == "High-performing")
cr_si.poor <- cr_si.df %>% filter(classification == "Poor-performing")
# plot
cr_si_pl.pre <- ggplot() +
geom_line(data = cr_si.poor, aes(x= time, y = value, group = id), color = "dark grey") +
labs(color = "Single infection\nhigh-performing cues", x = "Time (days)", y = "Conversion rate") +
theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
guides(shape = FALSE)
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
cr_si_pl <- cr_si_pl.pre +
geom_line(data = cr_si.high, aes(x= time, y = value, group = ez_label_si, color = ez_label_si), size = 1) +
scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb"))
co-infection
# get relevent variables
cr_ci <- ci_dyn.df %>%
filter(variable == "cr_1")
# good cue bad cue
ci_cue_dv <- ci_max_tau %>%
mutate(classification = case_when(
fitness_ci > 2.25 ~ "High-performing",
fitness_ci <= 2.25 ~ "Poor-performing"
))
si_cue_dv
# join with classificaiton
cr_ci.df <- cr_ci %>% left_join(ci_cue_dv )%>% left_join(ez_label, by = c("label" = "label_ci"))
# split into top erforming and poor-performing cues
cr_ci.high <- cr_ci.df %>% filter(classification == "High-performing")
cr_ci.poor <- cr_ci.df %>% filter(classification == "Poor-performing")
# plot
cr_ci_pl.pre <- ggplot() +
geom_line(data = cr_ci.poor, aes(x= time, y = value, group = label), color = "dark grey", alpha = 0.5) +
labs(color = "Co-infection\nhigh-performing cues", x = "Time (day)", y = "Conversion rate") +
theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
guides(shape = FALSE)
cr_ci_pl <- cr_ci_pl.pre +
geom_line(data = cr_ci.high, aes(x= time, y = value, group = ez_label, color = ez_label), size = 1) +
scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb"))
arrange together
cr_pl <- ggarrange(cr_si_pl, cr_ci_pl, align = "hv", ncol = 1)
#—————————————# # Time series conversion rate (heatmap) #—————————————# note that the order of cues is sorted by descending 30-days fitness. # single infection
cr_hm_si.pl1 <- ggplot() +
geom_tile(data = cr_si.poor, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
labs(x = "Time (days)", y = "Low performing\nsingle infection cues", fill = "Conversion rate") +
scale_fill_viridis_c(limits = c(0, 1)) +
xlim(1, 30) +
theme_bw()
cr_hm_si.pl2 <- ggplot() +
geom_tile(data = cr_si.high, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
labs(x = "", y = "High performing\nsingle infection cues", fill = "Conversion rate") +
scale_fill_viridis_c(limits = c(0, 1)) +
xlim(1, 30) +
theme_bw()
co-infection
cr_hm_ci.pl1 <- ggplot() +
geom_tile(data = cr_ci.poor, aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
labs(x = "Time (days)", y = "Low performing\nco-infection infection cues", fill = "Conversion rate") +
scale_fill_viridis_c(limits = c(0, 1)) +
xlim(1, 30) +
theme_bw() +
theme(legend.position = "none")
cr_hm_ci.pl2 <- ggplot() +
geom_tile(data = cr_ci.high, aes(x = time, forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
labs(x = "", y = "High performing\nco-infection cues", fill = "Conversion rate") +
scale_fill_viridis_c(limits = c(0, 1)) +
xlim(1, 30) +
theme_bw() +
theme(legend.position = "none")
cr_hm.pl <- ggarrange(cr_hm_si.pl2, cr_hm_ci.pl2, cr_hm_si.pl1, cr_hm_ci.pl1, align = "hv", ncol = 2, nrow = 2, common.legend = T)
Warning: Removed 6000 rows containing missing values (geom_tile).
Warning: Removed 6000 rows containing missing values (geom_tile).
Warning: Removed 4000 rows containing missing values (geom_tile).
Warning: Removed 10000 rows containing missing values (geom_tile).
Warning: Removed 12000 rows containing missing values (geom_tile).
ggsave(here("figures/report21/conversion_rate_heatmap.png"), height = 5.5, width = 12)

#—————————————# # disase curve #—————————————# note we are using the same 30 days infection dynamics from before
for single infection
note that we are dividing the cues into good and bad based on arbitrary cut off at 10.5

co-infection

# plot both single and co-infeciton disease curve together
disease_curve_pl <- ggarrange(disease_curve_pl.1, disease_curve_pl.2, align = "hv", heights = c(0.75, 1))
Error in ggarrange(disease_curve_pl.1, disease_curve_pl.2, align = "hv", :
object 'disease_curve_pl.1' not found
#———————–# Arranging opt_cue figure #———————–#
ggarrange(opt_cue_pl.A, opt_cue_pl.BC, labels = c("A", ""), ncol = 2, widths = c(0.75, 1))
Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider increasing max.overlaps

ggarrange(opt_cue_pl.A, opt_cue_pl.BC, labels = c("A", ""), ncol = 2, widths = c(0.75, 1))
Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider increasing max.overlaps
ggsave(here("figures/report21/opt_cue_final.png"), width = 14, height = 9)

#———————————# # remaking heatmaps #———————————# #———————————-# # static heatmap #———————————-# # processing data
max(static.df4$fitness_diff)
[1] 0.9096023
get heatmap
ggsave(here("figures/report21/static_heatmap.png"), width = 10)
Saving 10 x 7 in image
#————————-# # ci invasion matrix #————————-#
# import and process ci-invasion data
invade.ls <- list.files(here("data/ci_invasion/"), pattern = "*.csv", full.names = T)
invade.ls2 <- lapply(invade.ls, read.csv)
invade.df <- do.call(rbind, invade.ls2)
#write.csv(invade.df, row.names = F, here("data/ci_invasion.csv"))
invade.df <- read.csv(here("data/ci_invasion.csv"))
sanity check. all invasion combination hsould have a fitness higher than static competition
sc_static.df <- static.df %>%
mutate(fitness_static = fitness_1 - fitness_2) %>%
select(id_1, id_2, fitness_static)
# get reverse
sc_static.df2 <- sc_static.df
sc_static.df2$fitness_static <- sc_static.df$fitness_static*-1
names(sc_static.df2) <- c("id_2", "id_1", "fitness_static")
# combine
sc_static.df3 <- rbind(sc_static.df, sc_static.df2)
# left join. none of these are huge and invasion runs for 20 days whereas static competition runs for 30 days. looks good
invade.df %>%
left_join(sc_static.df3, by = c("mut_id"= "id_1", "res_id" = "id_2")) %>%
mutate(invade_static = fitness - fitness_static) %>%
filter(invade_static < 0)
process data for invasion matrix
create summary bar chart

plot togehter
invasion.pl <- ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/report21/invasion_heatmap.png"))
Saving 7.29 x 4.51 in image

#————————————-# # Effect of various cue perception #————————————-#
#——————————-# # static #——————————-# # logging
combined
static_comb.df <- left_join(
select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_diff),
select(static_comb, cue_1, label_ci_2, log_1, fTotal = fitness_diff),
by = c("cue_1", "log_1", "label_ci_2")) %>%
filter(!is.na(Total) & !is.na(Self)) %>%
mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
Error in `h()`:
! Problem with `filter()` input `..1`.
ℹ Input `..1` is `!is.na(Total) & !is.na(Self)`.
x object 'Total' not found
Backtrace:
1. ... %>% ...
8. base::.handleSimpleError(...)
9. dplyr h(simpleError(msg, call))
plot

#——————————–# # invasion #——————————–#
# join invade df with label because I am lazy
invade.df2 <- invade.df %>%
left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("mut_id" = "id_ci"))
log
# get non-logged pairings
invade_nolog <- invade.df2 %>%
mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none")) %>%
filter(log_1 == "none")
invade_log <- invade.df2 %>%
mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none")) %>%
filter(log_1 == "log")
invade_log.df <- left_join(
select(invade_nolog, cue_1, res_id, log_1, None = fitness),
select(invade_log, cue_1, res_id, log_1, Log = fitness),
by = c("cue_1", "res_id")) %>%
filter(!is.na(None) & !is.na(Log)) %>%
mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
combined
invade_nocomb <- invade.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "none")
invade_comb <- invade.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "comb")
invade_comb.df <- left_join(
select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
select(invade_comb, cue_1, res_id, log_1, Total = fitness),
by = c("cue_1", "log_1", "res_id")) %>%
filter(!is.na(Total) & !is.na(Self)) %>%
mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
plot. nevermind just assemble in illustrator
invade_cue.pl <- ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggsave(here("figures/report21/invade_cue.png"))
Saving 7 x 7 in image
#——————————–# # arrange co-infection figure #——————————–#
ggarrange(static.pl, static_cue.pl, invasion.pl, invade_cue.pl, widths = c(1.5, 1), align = "hv", labels = c("A", "", "B", ""))
ggsave(here("figures/report21/coinfection_final.png"), width = 15, height = 12)
#———————————–# # organization dual cue optimization data #————————————#
check if fitness of the dual cue is better than individual. 17 out of 25 comparisons have lower fitness lol when optimized via dual cue. Not sure what could remedy this. Smaller cue steps?
dual cue improved the fitness of the following combination. note many of these comparisons are made when time steps are different. Need to modify optimization so that time scale is the same. We should also be comparing fitness at 30 days when full resolution is achieved.
- G_log and I_log
- G_none, I_log
- Ig_log and I_log
- Ig_none, I_log
- R_log, I_log
- R_none, I_log
- R_none, Ig_none
I_log is a reaccuring theme here.
dual_cue.df2 %>%
left_join(select(si_opt.df, id, fitness_si = fitness_20), by = c("id" = "id")) %>%
left_join(select(si_opt.df, id, fitness_si_b = fitness_20), by = c("id_b" = "id")) %>%
mutate(fitness_diff = fitness - fitness_si,
fitness_diff_b = fitness - fitness_si_b) %>%
filter(fitness_diff > 0 & fitness_diff_b > 0)
plotting out some interesting heatmaps
source(here("functions/par_to_hm.R"))
# G log and I log
par_to_hm(par = c(-0.535568221, -2.5019109, 5.3812132, 1.8928449, 0.23451243),
cue_range = seq(0, 4.77815125, length.out = 500),
cue_range_b = seq(0, 6.77815125, length.out = 500),
plot = T)

# Ig_log and I_log
par_to_hm(par = c(-0.253533013, -3.1078035, 7.8203134, 0.3603524, -3.51389462),
cue_range = seq(0, 6.77815125, length.out = 500),
cue_range_b = seq(0, 6.77815125,length.out = 500),
plot = T)

dual_cue.df2
# R_log, I_log
par_to_hm(par = c(5.249264033 ,8.9174937, -52.6281549 ,-21.1288712, -13.05080084),
cue_range = seq(6, 7, length.out = 500),
cue_range_b = seq(0, 6.77815125,length.out = 500),
plot = T)
NA

---
title: "R Notebook"
output: html_notebook
---

Exploring dual-cue approach
```{r}
library(dplyr)
library(ggplot2)
library(here)
library(deSolve)
library(crone)
library(optimParallel)
library(doParallel)
library(doRNG)
library(arrow)
library(stringr)
library(parallel)
library(ggpubr)
library(microbenchmark)
library(profvis)
library(RANN)
```

```{r}
parameters_tsukushi <- c(R1 = 8.89*(10^6), # slightly higher
                lambda = 3.7*(10^5),
                mu = 0.025, 
                p = 8*(10^-6), # doubled form original
                alpha = 1, 
                alphag = 2, 
                beta = 5.721, 
                mum = 48, 
                mug = 4, 
                I0 = 43.85965, 
                Ig0 = 0, 
                a = 150, 
                b = 100, 
                sp = 1,
                psin = 16.69234,
                psiw = 0.8431785,
                phin = 0.03520591, 
                phiw = 550.842,
                iota = 2.18*(10^6),
                rho = 0.2627156)

I_range_none <- seq(0, 6*(10^6), by = (6*(10^6))/500)
I_range_log <- seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500)
G_range_none <- seq(0, 6*(10^4), by = (6*(10^4))/500)
G_range_log <- seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/500)
```

# testing out mgcv dual cue spline surface. looks good.
## note that if we go with ti, than we have only interactions between the 2 cues whereas te includes both main effect and interaction. ti gives us 5 parameters to optimze whereas te gives us 9. We should compare the two
```{r}
source(here("functions/chabaudi_si_clean.R"))

# smaller division
cue_range <- seq(0, 6*(10^6), by = (6*(10^6))/500)
cue_range_b <- seq(0, 6*(10^4), by = (6*(10^4))/500)
parameters_cr <- rep(-0.5,9)

cr_grid <- expand.grid(cue_range, cue_range_b)
    ## rename
    names(cr_grid) <- c("cue_range", "cue_range_b")
    ## create dummy y
    dummy_y <- runif(length(cue_range_b), 0, 1)
    ## put together df
    dummy_df <- data.frame(cue_range, cue_range_b, dummy_y)
    ## gam model
    dummy_cr.mod <- mgcv::gam(dummy_y ~ te(cue_range, cue_range_b, 
                                           bs = c("tp", "tp")
                                           k = c(3,3)), 
                              data = dummy_df, 
                              method = "REML")
    ## assign parameters
    dummy_cr.mod$coefficients <- parameters_cr
    
    # exponential transformation to limit conversion rate to between 0 and 1
    cr_fun <- function(cue_1, cue_2){
      res <- exp(-exp(mgcv::predict.gam(dummy_cr.mod, 
                                        newdata = data.frame("cue_range" = cue_1,
                                                             "cue_range_b" = cue_2))))
      return(res)
    }
    

```



# cutting down computational time
```{r}
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/test.R"))

# each run takes around 1 minute (cr_fun) (100 divisions)
# switching to nn2 takes 145 seconds
# increasing divisons to 500 using predict.gam takes still around 57 seconds. So increasing to 500 doesn't seem like a bad idea
system.time(test(parameters_cr = rep(0.5, 5),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.001),
       cue = "I",
       cue_b = "G",
       cue_range = I_range_log,
       cue_range_b = G_range_log,
       solver = "vode",
       log_cue = "log10",
       log_cue_b = "log10"
       ))


```

# making shit go fast! Need to cut down on time to get cr from function
```{r}
cue_range <- I_range_log
cue_range_b <- G_range_log
cr_grid <- expand.grid(cue_range, cue_range_b)

## rename
names(cr_grid) <- c("cue_range", "cue_range_b")
## create dummy y
dummy_y <- runif(length(cue_range_b), 0, 1)
## put together df
dummy_df <- data.frame(cue_range, cue_range_b, dummy_y)
## gam model
dummy_cr.mod <- mgcv::bam(dummy_y ~ ti(cue_range, cue_range_b, 
                                           k = c(3,3)), 
                              data = dummy_df)
## assign parameters
dummy_cr.mod$coefficients <- rep(0.5,5)
    
# original function of getting cr
cr_fun <- function(cue_1, cue_2){
      exp(-exp(mgcv::predict.bam(dummy_cr.mod, 
                                 newdata = data.frame("cue_range" = cue_1,
                                                             "cue_range_b" = cue_2),
                                 n.threads = 3)))
  }
    

cr_res <- exp(-exp(mgcv::predict.gam(dummy_cr.mod, 
                                        newdata = cr_grid)))
cr_df <- cbind(cr_grid, cr_res)

cue_1 <- 1
cue_2 <- 0.5

# another function for nearnest neighoubrs
nn_fun <- function(cue_1, cue_2){
  cr_df[nn2(data=cr_grid, query = cbind(cue_1, cue_2), k=1, treetype= "kd",
            searchtype = "priority")[[1]],3]
}

nn_fun2 <- function(cue_1, cue_2){
  cr_df[FNN::knnx.index(data=cr_grid, query = cbind(cue_1, cue_2), k = 1, algorithm = "kd_tree"),3]
}


# very similar
cr_fun(1,1)
nn_fun(1,1)
nn_fun2(1,1)

# gam.predict is faster
microbenchmark(cr_fun(1,1))
microbenchmark(nn_fun(1,1))
microbenchmark(nn_fun2(1,1))
```
# sanity checkz
```{r}
source(here("functions/test.R"))
# switching between cue and cue_b. should not make a difference
# 4.469067
test(parameters_cr = rep(0.5, 5),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.01),
       cue = "I",
       cue_b = "G",
       cue_range = I_range_log,
       cue_range_b = G_range_log,
       solver = "vode",
       log_cue = "log10",
       log_cue_b = "log10"
       )

# 4.469066. good
test(parameters_cr = rep(0.5, 5),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.01),
       cue = "G",
       cue_b = "I",
       cue_range = G_range_log,
       cue_range_b = I_range_log,
       solver = "vode",
       log_cue = "log10",
       log_cue_b = "log10"
       )

# short time steps vs longer time steps. barely made a difference
# 4.46904
test(parameters_cr = rep(0.5, 5),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.001),
       cue = "I",
       cue_b = "G",
       cue_range = I_range_log,
       cue_range_b = G_range_log,
       solver = "vode",
       log_cue = "log10",
       log_cue_b = "log10"
       )

# changing logs on cue_b
# 6.20639
test(parameters_cr = rep(0.5, 5),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.01),
       cue = "I",
       cue_b = "G",
       cue_range = I_range_log,
       cue_range_b = G_range_none,
       solver = "vode",
       log_cue = "log10",
       log_cue_b = "none"
       )

# changing logs on cue
# 5.110873
test(parameters_cr = rep(0.5, 5),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.01),
       cue = "I",
       cue_b = "G",
       cue_range = I_range_none,
       cue_range_b = G_range_log,
       solver = "vode",
       log_cue = "none",
       log_cue_b = "log10"
       )

# checing single cue infection. same 
## 4.724804
test(parameters_cr = rep(0.5, 4),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.01),
       cue = "I",
       cue_range = I_range_none,
       solver = "vode",
       log_cue = "none",
       )
# 4.724804
chabaudi_si_clean(parameters_cr = rep(0.5, 4),
       immunity = "tsukushi",
       parameters = parameters_tsukushi,
       time_range = seq(0, 20, 0.01),
       cue = "I",
       cue_range = I_range_none,
       solver = "vode",
       log_cue = "none",
       )
```

#--------------------------------------------------#
# remaking of co-infection reaction norm
#--------------------------------------------------#
```{r}
source(here("functions/chabaudi_ci_clean.R"))
source(here("functions/par_to_df.R"))
```

# function for getting conversion rate reaction norm and dynamics
```{r}
get_ci_dyn <- function(df){
  
  # get heavisde transformation
  heaviside_trans <- function(cue_range, max){
    crone::heaviside(cue_range)*(cue_range)+(crone::heaviside(cue_range-max)*(max-cue_range))
}
  
  # read in df info
  cue_1 <- ifelse(stringr::str_detect(df$cue, "-i"), 
                  gsub("*-i", "1", df$cue),
                  df$cue)
  
  cue_2 <- ifelse(stringr::str_detect(df$cue, "-i"), 
                  gsub("*-i", "2", df$cue),
                  df$cue)
  
  log <- ifelse(stringr::str_detect(df$log, "log"),
                "log10", "none")
  
  # get parameter
  par <- c(df$var1, df$var2, df$var3, df$var4)
  cue_range <- seq(df$low, df$high, by = df$by)
  label <- df$label
  
  # get dynamics
  dyn <- chabaudi_ci_clean(parameters_cr_1 = par,
                  parameters_cr_2 = par,
                  immunity = "tsukushi",
                  parameters = parameters_tsukushi,
                  cue_1 = cue_1,
                  cue_2 = cue_2,
                  cue_range_1 = cue_range,
                  cue_range_2 = cue_range,
                  log_cue_1 = log,
                  log_cue_2 = log,
                  solver = "vode",
                  time_range = seq(0, 30, 0.001),
                  dyn = T)
  
    dyn2 <- data.frame(dyn, label = rep(label, nrow(dyn)))

  # get reaction norm
  rn <- par_to_df(par = par, cue_range = cue_range)

  # get rug
  ## if + in cue, need to filter out those variables and add together
  if(stringr::str_detect(cue_1, "\\+")){
    cue_split <- stringr::str_split(string = cue_1, pattern = "\\+", simplify = T)
    ## get the two cues 
    cue_temp_1 <- cue_split[[1]]
    cue_temp_2 <- cue_split[[2]]
    
    ## filter dyn
    rug <- dyn2 %>% 
      filter(variable == cue_temp_1 | variable == cue_temp_2) %>% 
      dplyr::group_by(time) %>% 
      dplyr::mutate(sum = sum(value)) %>% 
      select(time, value = sum, label)
  }
  
  ## if sum
  if(cue_1 == "sum"){
    ## filter dyn
    rug <- dyn2 %>% 
      filter(variable == "I1" | variable == "I2" | variable == "Ig1" | variable == "Ig2") %>% 
      dplyr::group_by(time) %>% 
      dplyr::mutate(sum = sum(value)) %>% 
      select(time, value = sum, label)
  }
  
  # for simplified cue
  if(stringr::str_detect(cue_1, "\\+", negate = T) && cue_1 != "sum"){
    rug <- dyn2 %>% 
      dplyr::filter(variable == cue_1) %>% 
      dplyr::select(time, value, label)}
  
  # for logged, backtransform cue range of reaction norm 
  rn2 <- rn
  
  if(stringr::str_detect(log, "log")){rn2$cue_range <- 10^(rn2$cue_range)}

  # append label to reaction norm, dyn, and rug
  rn2 <- data.frame(rn2, label = rep(label, nrow(rn)))

  return(list(dyn2, rn2, rug))
}
```

# read in co_infection opt
```{r}
ci_opt.df <- read.csv(here("data/ci_opt.csv"))
cue_range_ci <- read.csv(here("data/cue_range_ci.csv"))

# join with cue range
ci_opt.df <- ci_opt.df %>% left_join(select(cue_range_ci, id, low, high, by), by = c("id" = "id"))

# split
ci_opt.ls <- split(ci_opt.df, seq(nrow(ci_opt.df)))

# loop
ci_opt.dyn <- lapply(ci_opt.ls, get_ci_dyn)

# bind together dynamics
dyn.ls <- lapply(ci_opt.dyn, function(x)x[[1]])
ci_dyn.df <- do.call(rbind, dyn.ls)
write_parquet(ci_dyn.df, here("data/ci_dyn/ci_dyn.parquet"))

rn.ls <- lapply(ci_opt.dyn, function(x)x[[2]])
ci_rn.df <- do.call(rbind, rn.ls)
write_parquet(ci_rn.df, here("data/ci_dyn/ci_rn.parquet"))

rug.ls <- lapply(ci_opt.dyn, function(x)x[[3]])
ci_rug.df <- do.call(rbind, rug.ls)
write_parquet(ci_rug.df , here("data/ci_dyn/ci_rug.parquet"))
```



# get in single infection reaction norm to match
```{r}
si_opt.df <- read.csv(here("data/si_opt.csv"))
cue_range_si <- read.csv(here("data/cue_range_si.csv"))

si_opt.df <- si_opt.df %>% left_join(select(cue_range_si, id, low, high, by, label), by = "id")

# function to get single infection reaction norm
get_si_rn <- function(df){
  par <- c(df$var1, df$var2, df$var3, df$var4)
  cue_range <- seq(df$low, df$high, by = df$by)
  
  rn <- par_to_df(par = par, cue_range = cue_range)
  
  # for logged, backtransform cue range of reaction norm 
  rn2 <- rn
  if(stringr::str_detect(df$log, "log")){rn2$cue_range <- 10^(rn2$cue_range)}

  # append label to reaction norm, dyn, and rug
  rn2 <- data.frame(rn2, id = df$id, label = rep(df$label, nrow(rn)))
  
  return(rn2)
}
# rn function and bind together
si_opt.ls <- split(si_opt.df, seq(nrow(si_opt.df)))
si_opt.rn <- lapply(si_opt.ls, get_si_rn)
si_opt.rn2 <- do.call(rbind, si_opt.rn)

```

# get single infeciton rug
```{r}
si_dyn <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))

# join to get label
si_dyn.df <- left_join(si_dyn, si_opt.df, by = "id")

# process get rug
get_rug <- function(df){
  cue <- unique(df$cue)
  
   if(stringr::str_detect(cue, "\\+")){
    cue_split <- stringr::str_split(string = cue, pattern = "\\+", simplify = T)
    ## get the two cues 
    cue_temp_1 <- cue_split[[1]]
    cue_temp_2 <- cue_split[[2]]
    ## filter dyn
    rug <- df %>% 
      filter(variable == cue_temp_1 | variable == cue_temp_2) %>% 
      dplyr::group_by(time) %>% 
      dplyr::mutate(sum = sum(value)) %>% 
      select(time, value = sum, label)
  }
  
  # for simplified cue
  if(stringr::str_detect(cue, "\\+", negate = T)){
    rug <- df %>% 
      dplyr::filter(variable == cue) %>% 
      dplyr::select(time, value, label)}
  
  return(rug)
}

# split based on individual labels
si_dyn.ls <- si_dyn.df %>% group_split(id)

# run function to get rug
si_rug <- mclapply(si_dyn.ls, get_rug)

si_rug.df <- do.call(rbind, si_rug)
```


# process reaction norm data to limit reaction norm to around the infection space
```{r}
# get si_label with ci cue range
si_ci_rug.df <- ci_rug.df %>% 
  mutate(label_si = case_when(
    label %in% c("I", "I1+I2") ~ "I",
    label %in% c("I log","I1+I2 log") ~ "I log",
    label %in% c("Ig", "Ig1+Ig2") ~ "Ig",
    label %in% c("Ig log") ~ "Ig log",
    label %in% c("sum", "I+Ig") ~ "I+Ig",
    label %in% c("sum log", "I+Ig log") ~ "I+Ig log",
    label == "R" ~ "R",
    label == "R log" ~ "R log",
    label %in% c("G", "G1+G2") ~ "G",
    label == "G log" ~ "G log"
  )) 

# get limit for si_rug
si_rug_lim.df <- si_rug.df %>% 
  group_by(label)%>% 
  summarise(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  select(label_si = label, min_si = min, max_si = max)


# filter to restriction conversion rate reaction norm range to cue ranges that appear in rug
## change to Inf/-inf to NA. Note that I am first joining with si rug lim to check which limit is larger, We will go with the cue range that has the largest span
ci_rug_lim.df <- si_ci_rug.df %>% 
  group_by(label) %>% 
  mutate(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  distinct(label, .keep_all = T) %>% 
  select(label, label_si, min, max)

rug_lim.final <- ci_rug_lim.df %>% left_join(si_rug_lim.df, by = "label_si") %>% 
  mutate(final_min = min(min, min_si),
         final_max = max(max, max_si))

# filter ci_rn by limit
ci_rn_lim.df <- ci_rn.df %>% 
  left_join(rug_lim.final) %>% 
  group_by(label) %>% 
  filter(cue_range <= final_max & cue_range >= final_min)

ci_rn_lim.df %>% filter(label == "G log") %>% ggplot() + geom_line(aes(x = cue_range, y = cr))
```

# match single infection rn with coinfection 
```{r}
# get ci label to si rug and filter by limit
si_opt.rn3 <- select(si_opt.rn2, cue_range, cr, label_si = label) %>% 
  left_join(distinct(select(si_ci_rug.df, label, label_si), label, .keep_all = T), by = "label_si") %>% 
  left_join(rug_lim.final) %>% 
  group_by(label) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  select(cue_range, cr, label, label_si)

si_opt.rn3
# get ci label to si rug
si_rug.df2 <- select(si_rug.df, value, label_si = label) %>% 
  left_join(distinct(select(si_ci_rug.df, label, label_si), label, .keep_all = T), by = "label_si")
```

# plot reaction norm
```{r}
# reorder the cues and get proper labels
ez_label <- cbind.data.frame(
  id_ci = c("I-i_none", "I-i_log",
            "I1+I2_none", "I1+I2_log",
            "Ig-i_none", "Ig-i_log",
            "I-i+Ig-i_none", "I-i+Ig-i_log",
            "sum_none", "sum_log",
            "G-i_none", "G-i_log",
            "G1+G2_none", "Ig1+Ig2_none",
            "R_none", "R_log"),
  id_si = c("I_none", "I_log",
            "I_none", "I_log",
            "Ig_none", "Ig_log",
            "I+Ig_none", "I+Ig_log",
            "I+Ig_none", "I+Ig_log",
            "G_none", "G_log",
            "G_none", "Ig_none",
            "R_none", "R_log"),
  label_ci = c("I", "I log",
            "I1+I2", "I1+I2 log",
            "Ig", "Ig log",
            "I+Ig", "I+Ig log",
            "sum", "sum log",
            "G", "G log",
            "G1+G2", "Ig1+Ig2",
            "R", "R log"),
  label_si = c("I", "I log",
               "I", "I log",
               "Ig", "Ig log",
               "I+Ig", "I+Ig log",
               "I+Ig", "I+Ig log",
               "G", "G log",
               "G", "Ig",
               "R", "R log"),
  ez_label = c("Asexual iRBC", "Asexual iRBC log10",
               "Total asexual iRBC", "Total asexual\niRBC log10",
               "Sexual iRBC", "Sexual iRBC log10",
               "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
               "Total iRBC", "Total iRBC log10",
               "Gametocyte", "Gametocyte log10",
               "Total gametocyte", "Total sexual iRBC",
               "RBC", "RBC log10"),
  ez_label_si = c("Asexual iRBC", "Asexual iRBC log10",
               "Asexual iRBC", "Asexual iRBC log10",
               "Sexual iRBC", "Sexual iRBC log10",
               "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
               "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
               "Gametocyte", "Gametocyte log10",
               "Gametocyte", "Sexual iRBC",
               "RBC", "RBC log10")
)

write.csv(ez_label, here("data/ez_label.csv"))

# join with ezlabel
ci_rn_lim.df <- ci_rn_lim.df %>% left_join(ez_label)
si_opt.rn3 <- si_opt.rn3 %>% left_join(ez_label)
ci_rug.df <- ci_rug.df %>% left_join(ez_label)
si_rug.df2 <- si_rug.df2 %>% left_join(ez_label)

# redo order of cues
ci_rn_lim.df$label <- factor(ci_rn_lim.df$label, 
                              levels = c("I", "I log",
                                         "I1+I2", "I1+I2 log",
                                         "Ig", "Ig log",
                                         "I+Ig", "I+Ig log",
                                         "sum", "sum log",
                                         "G", "G log",
                                         "G1+G2", "Ig1+Ig2",
                                         "R", "R log")) 

si_opt.rn3$label <- factor(si_opt.rn3$label, 
                              levels = c("I", "I log",
                                         "I1+I2", "I1+I2 log",
                                         "Ig", "Ig log",
                                         "I+Ig", "I+Ig log",
                                         "sum", "sum log",
                                         "G", "G log",
                                         "G1+G2", "Ig1+Ig2",
                                         "R", "R log"))

ci_rug.df$label <- factor(ci_rug.df$label, 
                              levels = c("I", "I log",
                                         "I1+I2", "I1+I2 log",
                                         "Ig", "Ig log",
                                         "I+Ig", "I+Ig log",
                                         "sum", "sum log",
                                         "G", "G log",
                                         "G1+G2", "Ig1+Ig2",
                                         "R", "R log"))

si_rug.df2$label <- factor(si_rug.df2$label, 
                              levels = c("I", "I log",
                                         "I1+I2", "I1+I2 log",
                                         "Ig", "Ig log",
                                         "I+Ig", "I+Ig log",
                                         "sum", "sum log",
                                         "G", "G log",
                                         "G1+G2", "Ig1+Ig2",
                                         "R", "R log"))

# plot
opt_cue_pl.A <- ggplot() +
  geom_line(data = ci_rn_lim.df, aes(x = cue_range, y = cr, color = "Co-infection")) +
  geom_line(data = si_opt.rn3, aes(x = cue_range, y = cr, color = "Single infection"), linetype = "dashed") +
  geom_rug(data = ci_rug.df, aes(x = value), color = "#4575b4", sides = "t") +
  geom_rug(data = si_rug.df2, aes(x = value), color = "#fc8d59", sides = "b") +
  facet_wrap(~ez_label, scales = "free_x", ncol = 2) +
  theme_bw() +
  labs(y = "Conversion rate", x = "Cue range", color = "Infection") +
  scale_x_continuous(labels = function(x) format(x, scientific = T),
                     guide = guide_axis(check.overlap = TRUE)) + 
                    theme(axis.text.x = element_text(size = 7))  +
  scale_color_manual(values=c( "#4575b4", "#fc8d59"))

ggsave(here("figures/report21/reaction_norm.png"), width = 7, height = 10)

```

#-----------------------------------------#
# plot maximum fitness accumulation for both si and ci
#-----------------------------------------#

```{r}
ez_label <- read.csv(here("data/ez_label.csv"))
```

# process single infection data
```{r}
# import in si maximum transmission potential
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))

# get maximum tau_cum for 30 days
si_max_tau <- si_dyn.df %>% 
  filter(variable == "tau_cum") %>% 
  group_by(id) %>% 
  summarise(fitness_si = max(value))
```

# process co-infection data
```{r}
# import in co-infeciton data, 30 days simulation
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

# get maximum tau_cum. Note multiplied by 2 to get total fitness for both strains, given that both are sadiptng the same strategy, we consider them the same genotye
ci_max_tau <- ci_dyn.df %>% 
  filter(variable == "tau_cum1") %>% 
  group_by(label) %>% 
  summarise(fitness_ci = max(value, na.rm = T))
```

# get label and join together 2 dataframes
```{r}
si_ci_max_tau <- ci_max_tau %>% 
  left_join(ez_label, by = c("label" = "label_ci")) %>% 
  left_join(si_max_tau, by = c("id_si" = "id"))
```

# plot
```{r}
opt_cue_pl.B <- ggplot() +
  geom_point(data = si_ci_max_tau, aes(x = fitness_si, y = fitness_ci, color = ez_label_si, shape = ez_label_si), size = 2.5) +
  ggrepel::geom_label_repel(data = si_ci_max_tau, aes(label = ez_label, x = fitness_si, y = fitness_ci),
                            fill = "white",xlim = c(-Inf, Inf), ylim = c(NA, NA)) +
  labs(x = "Optimal single-infection fitness", y = "Optimal Co-infection fitness (per strain)",
       color = "Single infection cue", shape = "Single infection cue") +
  scale_shape_manual(values = 15:24) +
  lims(x = c(8.5, 12.5)) +
  geom_vline(xintercept = 10.5, linetype = "dashed", alpha = 0.5) + # si good cue cutoff
  geom_hline(yintercept = 2.25, linetype = "dashed", alpha = 0.5) + # ci good cue cutoff
  theme_bw() +
  coord_cartesian(clip = "off") +
  theme(plot.margin=margin(r=0))

ggsave(here("figures/report21/si_ci_fitness.png"), height = 5)
```
#---------------------------------------#
# time series conversion rate (line form)
#---------------------------------------#

# single infection
```{r}
cr_si <- si_dyn.df %>% 
  filter(variable == "cr")

# good cue bad cue
si_cue_dv <- si_max_tau %>% 
  mutate(classification = case_when(
    fitness_si > 10.5 ~ "High-performing",
    fitness_si <= 10.5 ~ "Poor-performing"
  ))

# join with classificaiton
cr_si.df <- cr_si %>% left_join(si_cue_dv) %>% left_join(ez_label, by = c("id" = "id_si"))

# split into top erforming and poor-performing cues
cr_si.high <- cr_si.df %>% filter(classification == "High-performing")
cr_si.poor <- cr_si.df %>% filter(classification == "Poor-performing")

# plot
cr_si_pl.pre <- ggplot() +
  geom_line(data = cr_si.poor, aes(x= time, y = value, group = id), color = "dark grey") +
  labs(color = "Single infection\nhigh-performing cues", x = "Time (days)", y = "Conversion rate") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

cr_si_pl <- cr_si_pl.pre +
  geom_line(data = cr_si.high, aes(x= time, y = value, group = ez_label_si, color = ez_label_si), size = 1) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb"))

```

# co-infection
```{r}
# get relevent variables
cr_ci <- ci_dyn.df %>% 
  filter(variable == "cr_1")

# good cue bad cue
ci_cue_dv <- ci_max_tau %>% 
  mutate(classification = case_when(
    fitness_ci > 2.25 ~ "High-performing",
    fitness_ci <= 2.25 ~ "Poor-performing"
  ))

si_cue_dv

# join with classificaiton
cr_ci.df <- cr_ci %>% left_join(ci_cue_dv )%>% left_join(ez_label, by = c("label" = "label_ci"))

# split into top erforming and poor-performing cues
cr_ci.high <- cr_ci.df %>% filter(classification == "High-performing")
cr_ci.poor <- cr_ci.df %>% filter(classification == "Poor-performing")

# plot
cr_ci_pl.pre <- ggplot() +
  geom_line(data = cr_ci.poor, aes(x= time, y = value, group = label), color = "dark grey", alpha = 0.5) +
  labs(color = "Co-infection\nhigh-performing cues", x = "Time (day)", y = "Conversion rate") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

cr_ci_pl <- cr_ci_pl.pre +
  geom_line(data = cr_ci.high, aes(x= time, y = value, group = ez_label, color = ez_label), size = 1) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb"))
```

# arrange together
```{r}
cr_pl <- ggarrange(cr_si_pl, cr_ci_pl, align = "hv", ncol = 1)
```

#---------------------------------------#
# Time series conversion rate (heatmap)
#---------------------------------------#
note that the order of cues is sorted by descending 30-days fitness.
# single infection
```{r}
cr_hm_si.pl1 <- ggplot() +
  geom_tile(data = cr_si.poor, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
  labs(x = "Time (days)", y = "Low performing\nsingle infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 30) +
  theme_bw()

cr_hm_si.pl2 <- ggplot() +
  geom_tile(data = cr_si.high, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si), fill = value)) +
  labs(x = "", y = "High performing\nsingle infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 30) +
  theme_bw()
```

# co-infection
```{r}
cr_hm_ci.pl1 <- ggplot() +
  geom_tile(data = cr_ci.poor, aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
  labs(x = "Time (days)", y = "Low performing\nco-infection infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 30) +
  theme_bw() +
  theme(legend.position = "none")

cr_hm_ci.pl2 <- ggplot() +
  geom_tile(data = cr_ci.high, aes(x = time, forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
  labs(x = "", y = "High performing\nco-infection cues", fill = "Conversion rate") +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlim(1, 30) +
  theme_bw() +
  theme(legend.position = "none")


cr_hm.pl <- ggarrange(cr_hm_si.pl2, cr_hm_ci.pl2, cr_hm_si.pl1, cr_hm_ci.pl1, align = "hv", ncol = 2, nrow = 2, common.legend = T)

ggsave(here("figures/report21/conversion_rate_heatmap.png"), height = 5.5, width = 12)
```


#---------------------------------------#
# disase curve
#---------------------------------------#
note we are using the same 30 days infection dynamics from before

# for single infection
note that we are dividing the cues into good and bad based on arbitrary cut off at 10.5
```{r}
# get relevent variables
dc_si <- si_dyn.df %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R")

# morph into skinny format
dc_si.df <- tidyr::pivot_wider(dc_si, names_from = variable, values_from = value, id_cols = c(time, id)) %>% 
  mutate(total = I+Ig)

# join with classificaiton
dc_si.df2 <- dc_si.df %>% left_join(si_cue_dv)

# split into top erforming and poor-performing cues
dc_si.high <- dc_si.df2 %>% filter(classification == "High-performing")
dc_si.poor <- dc_si.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
dc_si.high2 <- dc_si.high %>% left_join(ez_label %>% distinct(label_si, .keep_all = T), by = c("id" = "id_si"))

# plot
disease_curve_pl.pre <- ggplot() +
  geom_path(data = dc_si.poor, aes(x= total, y = R, group = id), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Single infection\nhigh-performing cues", x = "Asexual&sexual iRBC per \u03bcL", y = "RBC per \u03bcL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

disease_curve_pl.1 <- disease_curve_pl.pre +
  geom_point(data = dc_si.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 2) +
  geom_path(data = dc_si.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb"))  +
  theme(legend.position = c(0.85, 0.8),
        legend.title = element_text(size = 6.5), 
        legend.text = element_text(size = 6.5)) +
  guides(color = guide_legend(override.aes = list(size = 0.1)))
```

# co-infection
```{r}
# get relevent variables
dc_ci <- ci_dyn.df %>% 
  filter(variable == "I1" | variable == "Ig1" | variable == "R")

# morph into skinny format
dc_ci.df <- tidyr::pivot_wider(dc_ci, names_from = variable, values_from = value, id_cols = c(time, label)) %>% 
  mutate(total = I1+Ig1)

# good cue bad cue
ci_cue_dv <- ci_max_tau %>% 
  mutate(classification = case_when(
    fitness_ci > 4.5 ~ "High-performing",
    fitness_ci <= 4.5 ~ "Poor-performing"
  ))

# join with classificaiton
dc_ci.df2 <- dc_ci.df %>% left_join(ci_cue_dv)

# split into top erforming and poor-performing cues
dc_ci.high <- dc_ci.df2 %>% filter(classification == "High-performing")
dc_ci.poor <- dc_ci.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
dc_ci.high2 <- dc_ci.high %>% left_join(ez_label, by = c("label" = "label_ci"))

# plot
disease_curve_pl.pre2 <- ggplot() +
  geom_path(data = dc_ci.poor, aes(x= total, y = R, group = label), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Co-infection\nhigh-performing cues", x = "Asexual&sexual iRBC per \u03bcL (per strain)", y = "RBC per \u03bcL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

disease_curve_pl.2 <- disease_curve_pl.pre2 +
  geom_point(data = dc_ci.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 2) +
  geom_path(data = dc_ci.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb")) +
  theme(legend.position = c(0.8, 0.8),
        legend.title = element_text(size = 7), 
        legend.text = element_text(size = 7)) +
  guides(color = guide_legend(override.aes = list(size = 0.1)))
```

 # plot both single and co-infeciton disease curve together
```{r}
disease_curve_pl <- ggarrange(disease_curve_pl.1, disease_curve_pl.2, align = "hv")
ggsave(here("figures/report21/disease_curve.png"), width = 12, height = 5)
```



#-----------------------#
Arranging opt_cue figure
#-----------------------#
```{r}
opt_cue_pl.BC <- ggarrange(opt_cue_pl.B, cr_hm.pl, labels = c("B", "C"), ncol = 1, heights = c(0.75, 1))
ggarrange(opt_cue_pl.A, opt_cue_pl.BC, labels = c("A", ""), ncol = 2, widths = c(0.75, 1))

ggsave(here("figures/report21/opt_cue_final.png"), width = 14, height = 9.5)
```



#---------------------------------#
# remaking heatmaps
#---------------------------------#
#----------------------------------#
# static heatmap
#----------------------------------#
# processing data
```{r}
# import in dataset
static.df <- read.csv(here("data/ci_static.csv"))
ez_label <- read.csv(here("data/ez_label.csv"))

# join with labelling
static.df2 <- static.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("id_1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, label_ci_2 = label_ci), by = c("id_2" = "id_ci")) %>% 
  mutate(fitness_diff = fitness_1 - fitness_2) %>% 
  select(label_ci_1, label_ci_2, fitness_diff)

# get reverse order, which is simply invovles switching the cues around the multiplying the fitness by negative 1
static.df3 <- static.df2
names(static.df3) <- c("label_ci_2", "label_ci_1", "fitness_diff")
static.df3$fitness_diff <- static.df2$fitness_diff * -1

# join
static.df4 <- rbind(static.df2, static.df3)

# get mean
static.mean <- static.df4 %>% 
  group_by(label_ci_1) %>% 
  summarize(mean_fitness = mean(fitness_diff, na.rm = T))
```

# get heatmap
```{r}
# heatmap
static.pl1 <- ggplot(data = static.df4, aes(x = label_ci_2, y = label_ci_1, fill = fitness_diff))+
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "#fc8d59", high = "#4575b4", mid = "white", 
   midpoint = 0, space = "Lab", lim = c(-0.92, 0.92), name="Fitness\ndifference") +
  theme_minimal() +  
  theme(legend.position = "left",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = 0)) + 
  labs(x = "Strain 2 cue", y = "Strain 1 cue") +
  coord_fixed()

# mean 
static.pl2 <- ggplot() +
  geom_bar(data =  static.mean, aes(y = label_ci_1, x = mean_fitness), stat = "identity") +
  labs(y = "", x = "Mean fitness\ndifference") +
  theme_bw() +
  theme(plot.margin = margin(l = 0),
       panel.grid.major = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
    axis.text.y=element_blank(),
    axis.ticks.y=element_blank())

static.pl <- ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
ggsave(here("figures/report21/static_heatmap.png"), width = 10)
```

#-------------------------#
# ci invasion matrix
#-------------------------#
```{r}
# import and process ci-invasion data
invade.ls <- list.files(here("data/ci_invasion/"), pattern = "*.csv", full.names = T)

invade.ls2 <- lapply(invade.ls, read.csv)
invade.df <- do.call(rbind, invade.ls2)
#write.csv(invade.df, row.names = F, here("data/ci_invasion.csv"))
invade.df <- read.csv(here("data/ci_invasion.csv"))
```

# sanity check. all invasion combination hsould have a fitness higher than static competition
```{r}
sc_static.df <- static.df %>% 
  mutate(fitness_static = fitness_1 - fitness_2) %>% 
  select(id_1, id_2, fitness_static)

# get reverse
sc_static.df2 <- sc_static.df
sc_static.df2$fitness_static <- sc_static.df$fitness_static*-1
names(sc_static.df2) <- c("id_2", "id_1", "fitness_static")

# combine
sc_static.df3 <- rbind(sc_static.df, sc_static.df2)

# left join. none of these are huge and invasion runs for 20 days whereas static competition runs for 30 days. looks good
invade.df %>% 
  left_join(sc_static.df3, by = c("mut_id"= "id_1", "res_id" = "id_2")) %>% 
  mutate(invade_static = fitness - fitness_static) %>% 
  filter(invade_static < 0)
```

# process data for invasion matrix
```{r}
invade.mat <- invade.df %>% 
  group_by(V1 = pmin(mut_id, res_id), V2 = pmax(mut_id, res_id)) %>% # group by cue competition, irregardless of order
  mutate(id_alt = paste0(V1, V2),
         invade = case_when(
           fitness > 0 ~ "invade",
           fitness < 0 ~ "not invade"
         )) %>% 
  group_by(id_alt) %>% 
  mutate(
    mut_is_V1 = case_when(
    mut_id == V1 ~ "V1_invade",
    mut_id != V1 ~ "V1_invaded")) %>% 
  arrange(id_alt) %>% 
  select(fitness, V1, V2, id_alt, invade, mut_is_V1) %>% 
  tidyr::pivot_wider(names_from = mut_is_V1, values_from = fitness) %>% 
  group_by(id_alt) %>% 
  mutate(V1_invade2 = gsub("NA", "", paste0(V1_invade, collapse = "")),
         V1_invaded2 = gsub("NA", "", paste0(V1_invaded, collapse = ""))) %>% 
  distinct(id_alt, .keep_all = T) %>% 
  mutate(
    category = case_when(
    V1_invade2 > 0 & V1_invaded2 > 0 ~ "Mutual invasion",
    V1_invade2 > 0 & V1_invaded2 < 0 ~ "Only strain 1 invasion",
    V1_invade2 < 0 & V1_invaded2 > 0 ~ "Only strain 2 invasion",
    V1_invade2 < 0 & V1_invaded2 < 0 ~ "Mutual non-invasion"
  )) %>% 
  select(V1, V2, invasion = category)

# for plotting, need to get all same cue vs same cue, which we will set to NA
invade.NA <- cbind.data.frame(`V1` = unique(invade.mat$V1),
      `V2` = unique(invade.mat$V1),
      invasion = NA)

invade.mat2 <- rbind(invade.mat, invade.NA)

# get label
invade.mat3 <- invade.mat2 %>% 
  left_join(select(ez_label, id_ci, V1_label = label_ci), by = c("V1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, V2_label = label_ci), by = c("V2" = "id_ci"))

# reorder so that ggplot do not mess up
library(gtools)

levels <- mixedsort(unique(c(invade.mat3$V1_label, invade.mat3$V2_label)))

invade.mat4 <- rbind(invade.mat3, 
                        invade.mat3 %>% 
                          rename(V2_label = V1_label, V1_label = V2_label) %>% 
                          select(V1_label, V2_label, invasion)) %>%
  mutate(across(ends_with("label"),
                ~factor(.x, levels = levels))) %>%
  filter(as.integer(V1_label) < as.integer(V2_label)) %>%
  mutate(V2_label = forcats::fct_rev(V2_label))

# plot
invasion.pl1 <- ggplot(data = invade.mat4, aes(x = V2_label, y = V1_label, fill = invasion)) +
  geom_tile(color = "black") +
  theme_minimal() +  
  theme(legend.position = "right",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = 0)) + 
  labs(fill = "Invasion", x = "Strain 2 cue", y = "Strain 1 cue") +
  #scale_x_discrete(limits = rev) +
  scale_y_discrete(limits = rev) +
  scale_fill_manual(values = c("Only strain 1 invasion" = "#4575b4",
                               "Only strain 2 invasion" = "#fc8d59",
                               "Mutual invasion" = "#fee090")) +
  theme(legend.position = "none")
```

# create summary bar chart
```{r}
# create a stacked barchart for summary
## filter out na
invade.matalt <- invade.mat3 %>% na.exclude()

# get frquency from both sides. Note when grouping for V2, from the perspective of cue 2, scenarrio when strain 2 invade = strain 1 invade
invade.matalt1 <- invade.matalt %>% group_by(V1_label, invasion) %>% 
  summarize(frequency_1 = n())

invade.matalt2 <- invade.matalt %>%
  mutate(invasion_alt = case_when(
    invasion == "Only strain 1 invasion" ~ "Only strain 2 invasion",
    invasion == "Only strain 2 invasion" ~ "Only strain 1 invasion",
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Mutual non-invasion" ~ "Mutual non-invasion"
  )) %>% 
  group_by(V2_label, invasion_alt) %>% 
  summarize(frequency_2 = n())     

# full join and sum. has confirmed all of them add up to 14 
invade.matalt3 <- full_join(invade.matalt1, invade.matalt2, by = c("V1_label" = "V2_label", "invasion" = "invasion_alt"))

invade.matalt3[is.na(invade.matalt3)] <- 0
invade.matalt4 <- invade.matalt3 %>% 
  mutate(freq = frequency_1 + frequency_2) %>% 
  mutate(temp = case_when(
    invasion == "Only strain 1 invasion" ~ freq
  )) %>% 
  group_by(V1_label) %>% 
  mutate(invade_1_freq = max(temp, na.rm = T))
  
invasion.pl2 <- ggplot() +
  geom_bar(data = invade.matalt4, aes(x = freq, y = reorder(V1_label, invade_1_freq), fill = forcats::fct_rev(factor(invasion, levels = c("Only strain 1 invasion", "Only strain 2 invasion", "Mutual invasion", "Mutual non-invasion")))), stat = "identity") +
  labs(x = "Frequency", fill = "Invasion", y = "Strain 1 cue") +
  theme_bw()  +
  scale_fill_manual(values = c("Only strain 1 invasion" = "#4575b4",
                               "Only strain 2 invasion" = "#fc8d59",
                               "Mutual invasion" = "#fee090")) +
  theme(text = element_text(size = 15))
```

# plot togehter
```{r}
invasion.pl <- ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/report21/invasion_heatmap.png"))
```


#-------------------------------------#
# Effect of various cue perception
#-------------------------------------#

#-------------------------------#
# static
#-------------------------------#
# logging
```{r}
# get non-logged pairings
static_nolog <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")


static_log <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

static_log.df <- left_join(
  select(static_nolog, cue_1, label_ci_2, log_1, None = fitness_diff),
  select(static_log, cue_1, label_ci_2, log_1, Log = fitness_diff),
  by = c("cue_1", "label_ci_2")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
```

# combined
```{r}
static_nocomb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

static_comb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
static_comb.df <- left_join(
  select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_diff),
  select(static_comb, cue_1, label_ci_2, log_1, Total = fitness_diff),
  by = c("cue_1", "log_1", "label_ci_2")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
```

# plot
```{r}
static_log.pl <- ggpaired(static_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4"))

static_comb.pl <- ggpaired(static_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4"))

static_cue.pl <- ggarrange(static_log.pl, static_comb.pl, ncol = 2, align = "h")
ggsave(here("figures/report21/static_cue.png"), width = 7.5, height = 4)
```

#--------------------------------#
# invasion
#--------------------------------#
```{r}
# join invade df with label because I am lazy
invade.df2 <- invade.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("mut_id" = "id_ci"))
```

# log
```{r}
# get non-logged pairings
invade_nolog <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")


invade_log <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

invade_log.df <- left_join(
  select(invade_nolog, cue_1, res_id, log_1, None = fitness),
  select(invade_log, cue_1, res_id, log_1, Log = fitness),
  by = c("cue_1", "res_id")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
```

# combined
```{r}
invade_nocomb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

invade_comb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
invade_comb.df <- left_join(
  select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
  select(invade_comb, cue_1, res_id, log_1, Total = fitness),
  by = c("cue_1", "log_1", "res_id")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
```

# plot. nevermind just assemble in illustrator
```{r}
invade_log.pl <- ggpaired(invade_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4"))

invade_comb.pl <- ggpaired(invade_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4"))

invade_cue.pl <- ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggsave(here("figures/report21/invade_cue.png"), width = 7.5, height = 4)
```


#--------------------------------#
# arrange co-infection figure
#--------------------------------#
```{r}
ggarrange(static.pl, static_cue.pl, invasion.pl, invade_cue.pl, widths = c(1.5, 1), align = "hv", labels = c("A", "", "B", ""))

ggsave(here("figures/report21/coinfection_final.png"), width = 15, height = 12)
```

#-----------------------------------#
# organization dual cue optimization data
#------------------------------------#
```{r}
dual_cue.ls <- list.files(here("data/dual_cue_opt/"), pattern  = "*.csv", full.names = T)

dual_cue.ls2 <- lapply(dual_cue.ls, read.csv)

dual_cue.df <- do.call(rbind, dual_cue.ls2)

# filter out those that did not optimize properly
dual_cue.df2 <- dual_cue.df %>% filter(fitness > 1)
```

# check if fitness of the dual cue is better than individual. 17 out of 25 comparisons have lower fitness lol when optimized via dual cue. Not sure what could remedy this. Smaller cue steps?

# dual cue improved the fitness of the following combination. note many of these comparisons are made when time steps are different. Need to modify optimization so that time scale is the same. We should also be comparing fitness at 30 days when full resolution is achieved. 
1) G_log and I_log
2) G_none, I_log
3) Ig_log and I_log
4) Ig_none, I_log
5) R_log, I_log
6) R_none, I_log
7) R_none, Ig_none

I_log is a reaccuring theme here.
```{r}
dual_cue.df2 %>% 
  left_join(select(si_opt.df, id, fitness_si = fitness_20), by = c("id" = "id")) %>% 
  left_join(select(si_opt.df, id, fitness_si_b = fitness_20), by = c("id_b" = "id")) %>% 
  mutate(fitness_diff = fitness - fitness_si,
         fitness_diff_b = fitness - fitness_si_b) %>% 
  filter(fitness_diff > 0 & fitness_diff_b > 0)
```


# plotting out some interesting heatmaps
```{r}
source(here("functions/par_to_hm.R"))

# G log and I log
par_to_hm(par = c(-0.535568221,	-2.5019109,	5.3812132,	1.8928449,	0.23451243),
          cue_range = seq(0,	4.77815125, length.out = 500),
          cue_range_b = seq(0,	6.77815125,	length.out = 500),
          plot = T)


# Ig_log and I_log
par_to_hm(par = c(-0.253533013,	-3.1078035,	7.8203134,	0.3603524,	-3.51389462),
          cue_range = seq(0,	6.77815125, length.out = 500),
          cue_range_b = seq(0,	6.77815125,length.out = 500),
          plot = T)

# R_log, I_log
par_to_hm(par = c(5.249264033	,8.9174937,	-52.6281549	,-21.1288712,	-13.05080084),
          cue_range = seq(6,	7,	length.out = 500),
          cue_range_b = seq(0,	6.77815125,length.out = 500),
          plot = T)

```







